regions <- tibble(state_abb = state.abb, region = state.region) %>%
rbind(tibble(state_abb = "DC", region = "South"))
days_on_zillow <- readxl::read_excel(here::here("data", "prepped", "days_on_zillow_state.xlsx")) %>%
mutate(date = as.Date(date)) %>%
mutate(state_abb = state.abb[match(RegionName, tolower(state.name))]) %>%
mutate(state_abb = replace(state_abb, RegionName == "district of columbia", "DC")) %>%
inner_join(regions)
## Joining, by = "state_abb"
tail(days_on_zillow)
## # A tibble: 6 x 8
## year month date days SizeRank RegionName state_abb region
## <dbl> <dbl> <date> <dbl> <dbl> <chr> <chr> <fct>
## 1 2019 7 2019-07-01 62 46 south dakota SD North Ce~
## 2 2019 7 2019-07-01 75 47 alaska AK West
## 3 2019 7 2019-07-01 79 48 north dakota ND North Ce~
## 4 2019 7 2019-07-01 71 49 vermont VT Northeast
## 5 2019 7 2019-07-01 61 50 district of co~ DC South
## 6 2019 7 2019-07-01 58 51 wyoming WY West
summary(days_on_zillow)
## year month date days
## Min. :2010 Min. : 1.000 Min. :2010-01-01 Min. : 42.0
## 1st Qu.:2012 1st Qu.: 3.000 1st Qu.:2012-05-01 1st Qu.: 81.0
## Median :2014 Median : 6.000 Median :2014-10-01 Median :102.0
## Mean :2014 Mean : 6.348 Mean :2014-10-01 Mean :103.5
## 3rd Qu.:2017 3rd Qu.: 9.000 3rd Qu.:2017-03-01 3rd Qu.:122.0
## Max. :2019 Max. :12.000 Max. :2019-07-01 Max. :263.0
## NA's :19
## SizeRank RegionName state_abb region
## Min. : 1 Length:5865 Length:5865 Northeast :1035
## 1st Qu.:13 Class :character Class :character South :1955
## Median :26 Mode :character Mode :character North Central:1380
## Mean :26 West :1495
## 3rd Qu.:39
## Max. :51
##
n_distinct(days_on_zillow$date)
## [1] 115
region_boxplot <- ggplot(days_on_zillow, aes(year, days, group = year)) +
geom_boxplot() +
facet_wrap(~region)
ggplotly(region_boxplot)
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
ggplot(days_on_zillow, aes(year, days, group = year)) +
geom_boxplot()
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).

p1 <- ggplot(days_on_zillow, aes(date, days, color = state_abb)) +
geom_line() +
facet_wrap(~region)
ggplotly(p1)
ggplot(days_on_zillow, aes(date, days, color = state_abb)) +
geom_line()
## Warning: Removed 19 rows containing missing values (geom_path).

days_on_zillow_geo_data <- days_on_zillow %>%
inner_join(map_data("state"), by = c("RegionName" = "region"))
tail(days_on_zillow_geo_data)
## # A tibble: 6 x 13
## year month date days SizeRank RegionName state_abb region long
## <dbl> <dbl> <date> <dbl> <dbl> <chr> <chr> <fct> <dbl>
## 1 2019 7 2019-07-01 58 51 wyoming WY West -106.
## 2 2019 7 2019-07-01 58 51 wyoming WY West -107.
## 3 2019 7 2019-07-01 58 51 wyoming WY West -107.
## 4 2019 7 2019-07-01 58 51 wyoming WY West -108.
## 5 2019 7 2019-07-01 58 51 wyoming WY West -109.
## 6 2019 7 2019-07-01 58 51 wyoming WY West -109.
## # ... with 4 more variables: lat <dbl>, group <dbl>, order <int>,
## # subregion <chr>
summarise(days_on_zillow_geo_data)
## # A tibble: 1 x 0
distinct(days_on_zillow_geo_data, days_on_zillow_geo_data$date)
## # A tibble: 115 x 1
## `days_on_zillow_geo_data$date`
## <date>
## 1 2010-01-01
## 2 2010-02-01
## 3 2010-03-01
## 4 2010-04-01
## 5 2010-05-01
## 6 2010-06-01
## 7 2010-07-01
## 8 2010-08-01
## 9 2010-09-01
## 10 2010-10-01
## # ... with 105 more rows
days_on_zillow_map <- ggplot(data = days_on_zillow_geo_data,
aes(x = long,
y = lat,
fill = days,
group = group),
color = "white") +
geom_polygon() +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
transition_time(date) +
ease_aes('cubic-in-out') +
scale_fill_viridis_c(option = "plasma") +
labs(title = 'Date: {round(frame_time, 0)}') +
theme_map()
animate(days_on_zillow_map,
duration = 100)

anim_save("days_on_zillow.gif")